#Appendix A1
#Check dependent variable is normal.
boxplot(childdata2$obesity_rate,main="Boxplot of obesity rate")
#Appendix A2
#Check transformations of dependent variable and see which one will be normal.
symbox(~`obesity_rate`, childdata2, na.rm=T, powers=seq(-3,3,by=.5),main="Types of transformation",ylab="Obesity rate", xlab="Power of transformation")
#Based on this, use square root instead.
childdata2$root_ob<- sqrt(childdata2$obesity_rate)
boxplot(childdata2$root_ob,main="Boxplot of transformed obesity rate")
#Join main dataset and map data
#Appendix Table 1: Univariate regression
#Variable selection.
#Univariate regression first.
model_1 <- lm(root_ob ~ `Density of fast food outlets`, data = modeldata)#significant
summary(model_1)
##
## Call:
## lm(formula = root_ob ~ `Density of fast food outlets`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.27677 -0.29938 -0.00988 0.29868 1.45546
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.6948563 0.0842665 43.85 <2e-16 ***
## `Density of fast food outlets` 0.0127375 0.0009756 13.06 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4625 on 313 degrees of freedom
## Multiple R-squared: 0.3526, Adjusted R-squared: 0.3505
## F-statistic: 170.5 on 1 and 313 DF, p-value: < 2.2e-16
model_2 <- lm(root_ob ~ `Active rate`, data = modeldata)#significant
summary(model_2)
##
## Call:
## lm(formula = root_ob ~ `Active rate`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5186 -0.3957 -0.0229 0.4089 1.4139
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.8446 0.2481 23.558 < 2e-16 ***
## `Active rate` -2.3662 0.5207 -4.544 8.38e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5489 on 266 degrees of freedom
## (47 observations deleted due to missingness)
## Multiple R-squared: 0.07204, Adjusted R-squared: 0.06855
## F-statistic: 20.65 on 1 and 266 DF, p-value: 8.378e-06
model_3 <- lm(root_ob ~ `Percentage of White children`, data = modeldata)#significant
summary(model_3)
##
## Call:
## lm(formula = root_ob ~ `Percentage of White children`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.33103 -0.35594 0.01714 0.34855 1.23013
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.1290 0.1569 39.073 <2e-16 ***
## `Percentage of White children` -1.6286 0.1809 -9.001 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5123 on 313 degrees of freedom
## Multiple R-squared: 0.2056, Adjusted R-squared: 0.2031
## F-statistic: 81.02 on 1 and 313 DF, p-value: < 2.2e-16
model_4 <- lm(root_ob ~ `Percentage of Mixed children`, data = modeldata)#significant
summary(model_4)
##
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.42548 -0.41214 0.01901 0.44044 1.19231
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.51037 0.05736 78.630 < 2e-16 ***
## `Percentage of Mixed children` 5.98789 1.24793 4.798 2.48e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5547 on 313 degrees of freedom
## Multiple R-squared: 0.06852, Adjusted R-squared: 0.06554
## F-statistic: 23.02 on 1 and 313 DF, p-value: 2.481e-06
model_5 <- lm(root_ob ~ `Percentage of Asian children`, data = modeldata)#significant
summary(model_5)
##
## Call:
## lm(formula = root_ob ~ `Percentage of Asian children`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.35098 -0.39116 0.00483 0.37521 1.20279
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.56331 0.03763 121.253 < 2e-16 ***
## `Percentage of Asian children` 2.50436 0.32550 7.694 1.87e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5271 on 313 degrees of freedom
## Multiple R-squared: 0.159, Adjusted R-squared: 0.1564
## F-statistic: 59.2 on 1 and 313 DF, p-value: 1.874e-13
model_6 <- lm(root_ob ~ `Percentage of African children`, data = modeldata)#significant
summary(model_6)
##
## Call:
## lm(formula = root_ob ~ `Percentage of African children`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.37075 -0.40101 0.00913 0.40737 1.21564
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.61152 0.03298 139.840 < 2e-16 ***
## `Percentage of African children` 4.38738 0.51796 8.471 9.66e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5184 on 313 degrees of freedom
## Multiple R-squared: 0.1865, Adjusted R-squared: 0.1839
## F-statistic: 71.75 on 1 and 313 DF, p-value: 9.661e-16
model_7 <- lm(root_ob ~ `IMD income average score`, data = modeldata)#significant
summary(model_7)
##
## Call:
## lm(formula = root_ob ~ `IMD income average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.83874 -0.20839 0.01143 0.19565 0.90327
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.53386 0.05152 68.60 <2e-16 ***
## `IMD income average score` 10.38165 0.41299 25.14 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3308 on 313 degrees of freedom
## Multiple R-squared: 0.6688, Adjusted R-squared: 0.6677
## F-statistic: 631.9 on 1 and 313 DF, p-value: < 2.2e-16
model_8 <- lm(root_ob ~ `IMD employment average score`, data = modeldata)#significant
summary(model_8)
##
## Call:
## lm(formula = root_ob ~ `IMD employment average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.95236 -0.26248 -0.01322 0.21784 1.14273
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.63327 0.06408 56.70 <2e-16 ***
## `IMD employment average score` 12.00710 0.65060 18.46 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3978 on 313 degrees of freedom
## Multiple R-squared: 0.5211, Adjusted R-squared: 0.5196
## F-statistic: 340.6 on 1 and 313 DF, p-value: < 2.2e-16
model_9 <- lm(root_ob ~ `IMD education average score`, data = modeldata)#significant
summary(model_9)
##
## Call:
## lm(formula = root_ob ~ `IMD education average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.92965 -0.30765 -0.06097 0.22807 1.38491
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.866392 0.064383 60.05 <2e-16 ***
## `IMD education average score` 0.041999 0.002851 14.73 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4417 on 313 degrees of freedom
## Multiple R-squared: 0.4095, Adjusted R-squared: 0.4076
## F-statistic: 217 on 1 and 313 DF, p-value: < 2.2e-16
model_10 <- lm(root_ob ~ `IMD health average score`, data = modeldata)#significant
summary(model_10)
##
## Call:
## lm(formula = root_ob ~ `IMD health average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.96863 -0.24078 -0.00531 0.19580 1.36286
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.82447 0.02311 208.74 <2e-16 ***
## `IMD health average score` 0.64498 0.03567 18.08 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.402 on 313 degrees of freedom
## Multiple R-squared: 0.5109, Adjusted R-squared: 0.5094
## F-statistic: 327 on 1 and 313 DF, p-value: < 2.2e-16
model_11 <- lm(root_ob ~ `IMD crime average score`, data = modeldata)#significant
summary(model_11)
##
## Call:
## lm(formula = root_ob ~ `IMD crime average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.23527 -0.27003 -0.00924 0.28329 1.31520
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.83803 0.02583 187.33 <2e-16 ***
## `IMD crime average score` 0.71579 0.04895 14.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.443 on 313 degrees of freedom
## Multiple R-squared: 0.4059, Adjusted R-squared: 0.404
## F-statistic: 213.8 on 1 and 313 DF, p-value: < 2.2e-16
model_12 <- lm(root_ob ~ `IMD housing average score`, data = modeldata)#insignificant
summary(model_12)
##
## Call:
## lm(formula = root_ob ~ `IMD housing average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.46163 -0.46095 0.01413 0.44805 1.42131
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.639841 0.118212 39.250 <2e-16 ***
## `IMD housing average score` 0.004669 0.005240 0.891 0.374
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5741 on 313 degrees of freedom
## Multiple R-squared: 0.002531, Adjusted R-squared: -0.0006562
## F-statistic: 0.7941 on 1 and 313 DF, p-value: 0.3735
model_13 <- lm(root_ob ~ `IMD living average score`, data = modeldata)#significant
summary(model_13)
##
## Call:
## lm(formula = root_ob ~ `IMD living average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.31999 -0.40801 0.01659 0.39484 1.30631
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.317863 0.078347 55.112 < 2e-16 ***
## `IMD living average score` 0.020840 0.003548 5.874 1.09e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5455 on 313 degrees of freedom
## Multiple R-squared: 0.09928, Adjusted R-squared: 0.09641
## F-statistic: 34.5 on 1 and 313 DF, p-value: 1.088e-08
model_14 <- lm(root_ob ~ `IMD IDACI average score`, data = modeldata)#significant
summary(model_14)
##
## Call:
## lm(formula = root_ob ~ `IMD IDACI average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.76574 -0.21304 -0.00681 0.20497 0.96490
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.52748 0.05158 68.38 <2e-16 ***
## `IMD IDACI average score` 7.91058 0.31361 25.22 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3301 on 313 degrees of freedom
## Multiple R-squared: 0.6703, Adjusted R-squared: 0.6692
## F-statistic: 636.3 on 1 and 313 DF, p-value: < 2.2e-16
model_15 <- lm(root_ob ~ `Fairly active rate`, data = modeldata)#insignificant
summary(model_15)
##
## Call:
## lm(formula = root_ob ~ `Fairly active rate`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4100 -0.4365 -0.0132 0.4096 1.4411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.9832 0.2022 24.641 <2e-16 ***
## `Fairly active rate` -1.0781 0.8308 -1.298 0.196
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.567 on 265 degrees of freedom
## (48 observations deleted due to missingness)
## Multiple R-squared: 0.006315, Adjusted R-squared: 0.002565
## F-statistic: 1.684 on 1 and 265 DF, p-value: 0.1955
model_16 <- lm(root_ob ~ `Less active rate`, data = modeldata)#significant
summary(model_16)
##
## Call:
## lm(formula = root_ob ~ `Less active rate`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.40039 -0.38736 -0.04242 0.38638 1.43481
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.7469 0.1612 23.24 < 2e-16 ***
## `Less active rate` 3.3987 0.5473 6.21 2.03e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5325 on 266 degrees of freedom
## (47 observations deleted due to missingness)
## Multiple R-squared: 0.1266, Adjusted R-squared: 0.1233
## F-statistic: 38.56 on 1 and 266 DF, p-value: 2.03e-09
#Check correlation of model variables.
##Figure 2. #visualise the correlation matrix
#visualise the correlation matrix
ggcorrplot(corr, hc.order = TRUE, type = "lower",
outline.col = "white",tl.cex = 11,
colors = c("#6D9EC1", "white", "#E46726"))
#Final model selection based on F test, adjusted R etc.
#Final model selection based on F test, adjusted R etc.
model_a <-lm(root_ob ~`Density of fast food outlets`+ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score`+ `IMD crime average score`+`IMD living average score`+ `IMD IDACI average score`+`Less active rate`, data = modeldata)
summary(model_a)
##
## Call:
## lm(formula = root_ob ~ `Density of fast food outlets` + `Percentage of Mixed children` +
## `Percentage of Asian children` + `Percentage of African children` +
## `IMD education average score` + `IMD health average score` +
## `IMD crime average score` + `IMD living average score` +
## `IMD IDACI average score` + `Less active rate`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78546 -0.19218 0.00912 0.18340 0.74284
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.015791 0.190873 21.039 < 2e-16 ***
## `Density of fast food outlets` -0.001369 0.001109 -1.235 0.217986
## `Percentage of Mixed children` -4.626314 1.398842 -3.307 0.001077 **
## `Percentage of Asian children` 1.362550 0.265761 5.127 5.8e-07 ***
## `Percentage of African children` 5.057605 0.589319 8.582 9.0e-16 ***
## `IMD education average score` 0.016403 0.004176 3.928 0.000110 ***
## `IMD health average score` 0.253100 0.072771 3.478 0.000593 ***
## `IMD crime average score` -0.053869 0.057342 -0.939 0.348392
## `IMD living average score` -0.004143 0.002324 -1.783 0.075783 .
## `IMD IDACI average score` 2.928125 0.897886 3.261 0.001260 **
## `Less active rate` 0.306062 0.304036 1.007 0.315043
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2655 on 257 degrees of freedom
## (47 observations deleted due to missingness)
## Multiple R-squared: 0.7902, Adjusted R-squared: 0.782
## F-statistic: 96.8 on 10 and 257 DF, p-value: < 2.2e-16
#Remove less active rate
model_b <-lm(root_ob ~`Density of fast food outlets`+ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score`+ `IMD crime average score`+`IMD living average score`+ `IMD IDACI average score`, data = modeldata)
summary(model_b)
##
## Call:
## lm(formula = root_ob ~ `Density of fast food outlets` + `Percentage of Mixed children` +
## `Percentage of Asian children` + `Percentage of African children` +
## `IMD education average score` + `IMD health average score` +
## `IMD crime average score` + `IMD living average score` +
## `IMD IDACI average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.73966 -0.18434 -0.01111 0.18002 1.23216
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.866e+00 1.467e-01 26.345 < 2e-16 ***
## `Density of fast food outlets` -5.435e-05 9.435e-04 -0.058 0.954099
## `Percentage of Mixed children` -3.148e+00 1.348e+00 -2.336 0.020165 *
## `Percentage of Asian children` 1.171e+00 2.258e-01 5.188 3.89e-07 ***
## `Percentage of African children` 4.485e+00 5.398e-01 8.309 3.21e-15 ***
## `IMD education average score` 1.997e-02 3.791e-03 5.268 2.61e-07 ***
## `IMD health average score` 1.730e-01 6.652e-02 2.601 0.009743 **
## `IMD crime average score` -3.687e-02 5.348e-02 -0.689 0.491088
## `IMD living average score` -3.138e-03 2.183e-03 -1.438 0.151476
## `IMD IDACI average score` 2.936e+00 8.213e-01 3.575 0.000406 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2779 on 305 degrees of freedom
## Multiple R-squared: 0.7722, Adjusted R-squared: 0.7655
## F-statistic: 114.9 on 9 and 305 DF, p-value: < 2.2e-16
#Remove fast food shop density
model_c <-lm(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score`+ `IMD crime average score`+`IMD living average score`+ `IMD IDACI average score`, data = modeldata)
summary(model_c)
##
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` +
## `Percentage of African children` + `IMD education average score` +
## `IMD health average score` + `IMD crime average score` +
## `IMD living average score` + `IMD IDACI average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7391 -0.1841 -0.0108 0.1802 1.2335
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.862160 0.131781 29.307 < 2e-16 ***
## `Percentage of Mixed children` -3.157068 1.335894 -2.363 0.018740 *
## `Percentage of Asian children` 1.170565 0.225049 5.201 3.63e-07 ***
## `Percentage of African children` 4.488435 0.536011 8.374 2.03e-15 ***
## `IMD education average score` 0.020010 0.003727 5.368 1.57e-07 ***
## `IMD health average score` 0.171782 0.062841 2.734 0.006629 **
## `IMD crime average score` -0.037292 0.052893 -0.705 0.481324
## `IMD living average score` -0.003164 0.002132 -1.484 0.138755
## `IMD IDACI average score` 2.930233 0.813056 3.604 0.000366 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2774 on 306 degrees of freedom
## Multiple R-squared: 0.7722, Adjusted R-squared: 0.7663
## F-statistic: 129.7 on 8 and 306 DF, p-value: < 2.2e-16
#Remove living average score
model_d <-lm(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score`+ `IMD crime average score`+ `IMD IDACI average score`, data = modeldata)
summary(model_d)
##
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` +
## `Percentage of African children` + `IMD education average score` +
## `IMD health average score` + `IMD crime average score` +
## `IMD IDACI average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71569 -0.18310 -0.00798 0.18331 1.16380
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.812813 0.127768 29.842 < 2e-16 ***
## `Percentage of Mixed children` -3.215825 1.337921 -2.404 0.016827 *
## `Percentage of Asian children` 1.103218 0.220859 4.995 9.89e-07 ***
## `Percentage of African children` 4.396905 0.533494 8.242 4.99e-15 ***
## `IMD education average score` 0.020879 0.003688 5.661 3.45e-08 ***
## `IMD health average score` 0.156840 0.062151 2.524 0.012122 *
## `IMD crime average score` -0.022629 0.052065 -0.435 0.664138
## `IMD IDACI average score` 2.778762 0.808206 3.438 0.000667 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.278 on 307 degrees of freedom
## Multiple R-squared: 0.7706, Adjusted R-squared: 0.7653
## F-statistic: 147.3 on 7 and 307 DF, p-value: < 2.2e-16
#Remove crime average score instead of living average score
model_e <-lm(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score`+ `IMD living average score`+ `IMD IDACI average score`, data = modeldata)
summary(model_e)
##
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` +
## `Percentage of African children` + `IMD education average score` +
## `IMD health average score` + `IMD living average score` +
## `IMD IDACI average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7369 -0.1846 -0.0076 0.1777 1.2215
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.904688 0.117068 33.354 < 2e-16 ***
## `Percentage of Mixed children` -3.420394 1.281570 -2.669 0.008014 **
## `Percentage of Asian children` 1.136345 0.219573 5.175 4.12e-07 ***
## `Percentage of African children` 4.526474 0.532851 8.495 8.68e-16 ***
## `IMD education average score` 0.019997 0.003724 5.370 1.56e-07 ***
## `IMD health average score` 0.169717 0.062721 2.706 0.007193 **
## `IMD living average score` -0.002884 0.002093 -1.378 0.169209
## `IMD IDACI average score` 2.723392 0.757660 3.594 0.000379 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2772 on 307 degrees of freedom
## Multiple R-squared: 0.7718, Adjusted R-squared: 0.7666
## F-statistic: 148.4 on 7 and 307 DF, p-value: < 2.2e-16
#Remove crime average score and living average score
model_f <-lm(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score` + `IMD IDACI average score`, data = modeldata)
summary(model_f)
##
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` +
## `Percentage of African children` + `IMD education average score` +
## `IMD health average score` + `IMD IDACI average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71559 -0.18097 -0.01082 0.18078 1.16015
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.842304 0.108118 35.538 < 2e-16 ***
## `Percentage of Mixed children` -3.378109 1.283071 -2.633 0.008895 **
## `Percentage of Asian children` 1.085460 0.216761 5.008 9.29e-07 ***
## `Percentage of African children` 4.425927 0.528602 8.373 2.00e-15 ***
## `IMD education average score` 0.020823 0.003681 5.657 3.52e-08 ***
## `IMD health average score` 0.156375 0.062060 2.520 0.012249 *
## `IMD IDACI average score` 2.657163 0.757236 3.509 0.000517 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2776 on 308 degrees of freedom
## Multiple R-squared: 0.7704, Adjusted R-squared: 0.766
## F-statistic: 172.3 on 6 and 308 DF, p-value: < 2.2e-16
#Stepwise AIC and best subset variable selection (reference only).
# stepwise aic regression
#Run for all variables
ols_step_both_aic(model_a)
##
##
## Stepwise Summary
## ---------------------------------------------------------------------------------------------------
## Variable Method AIC RSS Sum Sq R-Sq Adj. R-Sq
## ---------------------------------------------------------------------------------------------------
## `IMD IDACI average score` addition 199.568 34.097 69.312 0.67027 0.66922
## `Less active rate` addition 155.185 27.175 59.176 0.68529 0.68292
## `Percentage of African children` addition 117.903 23.470 62.881 0.72820 0.72511
## `IMD education average score` addition 96.236 21.486 64.865 0.75117 0.74739
## `IMD health average score` addition 83.766 20.357 65.994 0.76425 0.75975
## `Percentage of Asian children` addition 74.845 19.544 66.807 0.77367 0.76846
## `Percentage of Mixed children` addition 63.230 18.576 67.775 0.78488 0.77908
## `IMD living average score` addition 61.559 18.323 68.028 0.78780 0.78125
## `Density of fast food outlets` addition 61.427 18.178 68.173 0.78948 0.78214
## ---------------------------------------------------------------------------------------------------
k <- ols_step_both_aic(model_a)
ols_step_both_aic(model_a, details = FALSE)
##
##
## Stepwise Summary
## ---------------------------------------------------------------------------------------------------
## Variable Method AIC RSS Sum Sq R-Sq Adj. R-Sq
## ---------------------------------------------------------------------------------------------------
## `IMD IDACI average score` addition 199.568 34.097 69.312 0.67027 0.66922
## `Less active rate` addition 155.185 27.175 59.176 0.68529 0.68292
## `Percentage of African children` addition 117.903 23.470 62.881 0.72820 0.72511
## `IMD education average score` addition 96.236 21.486 64.865 0.75117 0.74739
## `IMD health average score` addition 83.766 20.357 65.994 0.76425 0.75975
## `Percentage of Asian children` addition 74.845 19.544 66.807 0.77367 0.76846
## `Percentage of Mixed children` addition 63.230 18.576 67.775 0.78488 0.77908
## `IMD living average score` addition 61.559 18.323 68.028 0.78780 0.78125
## `Density of fast food outlets` addition 61.427 18.178 68.173 0.78948 0.78214
## ---------------------------------------------------------------------------------------------------
ols_step_best_subset(model_a)
#Run for model f
ols_step_both_aic(model_f)
##
##
## Stepwise Summary
## ---------------------------------------------------------------------------------------------------
## Variable Method AIC RSS Sum Sq R-Sq Adj. R-Sq
## ---------------------------------------------------------------------------------------------------
## `IMD IDACI average score` addition 199.568 34.097 69.312 0.67027 0.66922
## `Percentage of African children` addition 161.543 30.028 73.380 0.70962 0.70776
## `IMD education average score` addition 123.944 26.481 76.928 0.74392 0.74145
## `Percentage of Asian children` addition 109.707 25.150 78.258 0.75679 0.75365
## `Percentage of Mixed children` addition 99.939 24.228 79.180 0.76570 0.76191
## `IMD health average score` addition 95.512 23.739 79.670 0.77044 0.76596
## ---------------------------------------------------------------------------------------------------
k <- ols_step_both_aic(model_f)
ols_step_both_aic(model_f, details = FALSE)
##
##
## Stepwise Summary
## ---------------------------------------------------------------------------------------------------
## Variable Method AIC RSS Sum Sq R-Sq Adj. R-Sq
## ---------------------------------------------------------------------------------------------------
## `IMD IDACI average score` addition 199.568 34.097 69.312 0.67027 0.66922
## `Percentage of African children` addition 161.543 30.028 73.380 0.70962 0.70776
## `IMD education average score` addition 123.944 26.481 76.928 0.74392 0.74145
## `Percentage of Asian children` addition 109.707 25.150 78.258 0.75679 0.75365
## `Percentage of Mixed children` addition 99.939 24.228 79.180 0.76570 0.76191
## `IMD health average score` addition 95.512 23.739 79.670 0.77044 0.76596
## ---------------------------------------------------------------------------------------------------
ols_step_best_subset(model_f)
#save the residuals into dataframe
finalmodel<- lm(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score` + `IMD IDACI average score`, data = modeldata)
summary(finalmodel)
##
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` +
## `Percentage of African children` + `IMD education average score` +
## `IMD health average score` + `IMD IDACI average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71559 -0.18097 -0.01082 0.18078 1.16015
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.842304 0.108118 35.538 < 2e-16 ***
## `Percentage of Mixed children` -3.378109 1.283071 -2.633 0.008895 **
## `Percentage of Asian children` 1.085460 0.216761 5.008 9.29e-07 ***
## `Percentage of African children` 4.425927 0.528602 8.373 2.00e-15 ***
## `IMD education average score` 0.020823 0.003681 5.657 3.52e-08 ***
## `IMD health average score` 0.156375 0.062060 2.520 0.012249 *
## `IMD IDACI average score` 2.657163 0.757236 3.509 0.000517 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2776 on 308 degrees of freedom
## Multiple R-squared: 0.7704, Adjusted R-squared: 0.766
## F-statistic: 172.3 on 6 and 308 DF, p-value: < 2.2e-16
#Check multicollinearity
vif(finalmodel)
## `Percentage of Mixed children` `Percentage of Asian children`
## 4.220859 1.598609
## `Percentage of African children` `IMD education average score`
## 3.631903 4.220017
## `IMD health average score` `IMD IDACI average score`
## 6.346407 8.240346
ObesityMAP$`Model residuals`<- finalmodel$residuals
#Figure 3. Diagnostic plots for OLS regression model
#residual plot
qplot(finalmodel$`Model residuals`) + geom_histogram()
par(mfrow = c(2, 2)) # Split the plotting panel into a 2 x 2 grid
plot(finalmodel) # Plot the model residuals
#Cross-validation of model
###Cross-validation of model
#####Cross-Validation
set.seed(48)
regressdata<-modeldata[,c("Percentage of Mixed children","Percentage of Asian children",
"Percentage of African children","IMD education average score",
"IMD health average score","IMD IDACI average score",
"root_ob" )]
model<- train(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score` + `IMD IDACI average score`,
regressdata,
method="lm",
trControl=trainControl(method="repeatedcv",
number=10,
verboseIter=TRUE))
## + Fold01.Rep1: intercept=TRUE
## - Fold01.Rep1: intercept=TRUE
## + Fold02.Rep1: intercept=TRUE
## - Fold02.Rep1: intercept=TRUE
## + Fold03.Rep1: intercept=TRUE
## - Fold03.Rep1: intercept=TRUE
## + Fold04.Rep1: intercept=TRUE
## - Fold04.Rep1: intercept=TRUE
## + Fold05.Rep1: intercept=TRUE
## - Fold05.Rep1: intercept=TRUE
## + Fold06.Rep1: intercept=TRUE
## - Fold06.Rep1: intercept=TRUE
## + Fold07.Rep1: intercept=TRUE
## - Fold07.Rep1: intercept=TRUE
## + Fold08.Rep1: intercept=TRUE
## - Fold08.Rep1: intercept=TRUE
## + Fold09.Rep1: intercept=TRUE
## - Fold09.Rep1: intercept=TRUE
## + Fold10.Rep1: intercept=TRUE
## - Fold10.Rep1: intercept=TRUE
## Aggregating results
## Fitting final model on full training set
summary(model)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71559 -0.18097 -0.01082 0.18078 1.16015
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.842304 0.108118 35.538 < 2e-16
## `\\`Percentage of Mixed children\\`` -3.378109 1.283071 -2.633 0.008895
## `\\`Percentage of Asian children\\`` 1.085460 0.216761 5.008 9.29e-07
## `\\`Percentage of African children\\`` 4.425927 0.528602 8.373 2.00e-15
## `\\`IMD education average score\\`` 0.020823 0.003681 5.657 3.52e-08
## `\\`IMD health average score\\`` 0.156375 0.062060 2.520 0.012249
## `\\`IMD IDACI average score\\`` 2.657163 0.757236 3.509 0.000517
##
## (Intercept) ***
## `\\`Percentage of Mixed children\\`` **
## `\\`Percentage of Asian children\\`` ***
## `\\`Percentage of African children\\`` ***
## `\\`IMD education average score\\`` ***
## `\\`IMD health average score\\`` *
## `\\`IMD IDACI average score\\`` ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2776 on 308 degrees of freedom
## Multiple R-squared: 0.7704, Adjusted R-squared: 0.766
## F-statistic: 172.3 on 6 and 308 DF, p-value: < 2.2e-16
#print R2, MAE and RMSE
#print R2, MAE and RMSE
print(model)
## Linear Regression
##
## 315 samples
## 6 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 1 times)
## Summary of sample sizes: 283, 284, 283, 284, 283, 283, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 0.2795769 0.7716322 0.2217382
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
#Now check for spatial auto-correlation
ObesityMAP <- as(ObesityMAP,"Spatial")
names(ObesityMAP)
## [1] "GSS_CODE"
## [2] "FID"
## [3] "LAD19NM"
## [4] "LAD19NMW"
## [5] "BNG_E"
## [6] "BNG_N"
## [7] "LONG"
## [8] "LAT"
## [9] "Shape__Area"
## [10] "Shape__Length"
## [11] "obesity_rate"
## [12] "Density.of.fast.food.outlets"
## [13] "Active.rate"
## [14] "Percentage.of.White.children"
## [15] "Percentage.of.Mixed.children"
## [16] "Percentage.of.Asian.children"
## [17] "Percentage.of.African.children"
## [18] "Income...Average.rank"
## [19] "Employment...Average.rank"
## [20] "Education..Skills.and.Training...Average.rank"
## [21] "Health.Deprivation.and.Disability...Average.rank"
## [22] "Crime...Average.rank"
## [23] "Barriers.to.Housing.and.Services...Average.rank"
## [24] "Living.Environment...Average.rank"
## [25] "IDACI...Average.rank"
## [26] "IMD.income.average.score"
## [27] "IMD.employment.average.score"
## [28] "IMD.education.average.score"
## [29] "IMD.health.average.score"
## [30] "IMD.crime.average.score"
## [31] "IMD.housing.average.score"
## [32] "IMD.living.average.score"
## [33] "IMD.IDACI.average.score"
## [34] "Fairly.active.rate"
## [35] "Less.active.rate"
## [36] "root_ob"
## [37] "Model.residuals"
#Calculate centriod of each LA.
coordsW <- coordinates(ObesityMAP)
plot(coordsW)
#Neighbours list of queens contiguity
LWard_nb <- poly2nb(ObesityMAP, queen=T)
#and nearest neighbours
knn_wards <- knearneigh(coordsW, k=4)
LWard_knn <- knn2nb(knn_wards)
#plot and add a map
plot(LWard_nb, coordinates(coordsW), col="red")
plot(LWard_knn, coordinates(coordsW), col="blue")
plot(ObesityMAP)
#create a spatial weights matrix
Lward.queens_weight <- nb2listw(LWard_nb, style="C",zero.policy=TRUE)
Lward.knn_4_weight <- nb2listw(LWard_knn, style="C",zero.policy=TRUE)
#Run Moran’s I using the residuals from our final model #first using queens neighbours
moran.test(ObesityMAP@data$`Model.residuals`, Lward.queens_weight,zero.policy=TRUE)
##
## Moran I test under randomisation
##
## data: ObesityMAP@data$Model.residuals
## weights: Lward.queens_weight n reduced by no-neighbour observations
##
##
## Moran I statistic standard deviate = 7.9544, p-value = 9e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.271642496 -0.003194888 0.001193815
#Then knn = 4
moran.test(ObesityMAP@data$`Model.residuals`, Lward.knn_4_weight,zero.policy=TRUE)
##
## Moran I test under randomisation
##
## data: ObesityMAP@data$Model.residuals
## weights: Lward.knn_4_weight
##
## Moran I statistic standard deviate = 7.8037, p-value = 3.005e-15
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.285285412 -0.003184713 0.001366461
##GWR
#calculate kernel bandwidth
GWRbandwidth <- gwr.sel(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score` + `IMD IDACI average score`,data = modeldata,coords=coordsW,adapt=T)
## Adaptive q: 0.381966 CV score: 22.7907
## Adaptive q: 0.618034 CV score: 23.71091
## Adaptive q: 0.236068 CV score: 22.32975
## Adaptive q: 0.145898 CV score: 22.26535
## Adaptive q: 0.1565004 CV score: 22.27587
## Adaptive q: 0.09016994 CV score: 21.82901
## Adaptive q: 0.05572809 CV score: 21.04139
## Adaptive q: 0.03444185 CV score: 20.49357
## Adaptive q: 0.02128624 CV score: 22.45724
## Adaptive q: 0.04257247 CV score: 20.67178
## Adaptive q: 0.02941686 CV score: 20.76158
## Adaptive q: 0.03659131 CV score: 20.55597
## Adaptive q: 0.03252248 CV score: 20.55079
## Adaptive q: 0.03451283 CV score: 20.49199
## Adaptive q: 0.03530674 CV score: 20.49814
## Adaptive q: 0.03479792 CV score: 20.48601
## Adaptive q: 0.03499227 CV score: 20.4862
## Adaptive q: 0.03488435 CV score: 20.48431
## Adaptive q: 0.03492504 CV score: 20.48376
## Adaptive q: 0.03492504 CV score: 20.48376
#run the gwr model
gwr.model = gwr(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score` + `IMD IDACI average score`,data = modeldata,coords=coordsW, adapt=GWRbandwidth, hatmatrix=TRUE, se.fit=TRUE)
#print the results of the model
gwr.model
## Call:
## gwr(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` +
## `Percentage of African children` + `IMD education average score` +
## `IMD health average score` + `IMD IDACI average score`, data = modeldata,
## coords = coordsW, adapt = GWRbandwidth, hatmatrix = TRUE,
## se.fit = TRUE)
## Kernel function: gwr.Gauss
## Adaptive quantile: 0.03492504 (about 11 of 315 data points)
## Summary of GWR coefficient estimates at data points:
## Min. 1st Qu. Median
## X.Intercept. 2.2379325 2.9897034 3.4863385
## X.Percentage.of.Mixed.children. -22.5861202 -3.5869974 -0.8864863
## X.Percentage.of.Asian.children. -2.5524665 0.2057791 1.1725359
## X.Percentage.of.African.children. -8.5687184 2.1280087 3.3279073
## X.IMD.education.average.score. -0.0072453 0.0185664 0.0262770
## X.IMD.health.average.score. -0.6729346 -0.1458490 -0.0352223
## X.IMD.IDACI.average.score. -1.1677115 2.9253575 4.3309386
## 3rd Qu. Max. Global
## X.Intercept. 3.9000246 4.5576890 3.8423
## X.Percentage.of.Mixed.children. 1.4186154 14.1164053 -3.3781
## X.Percentage.of.Asian.children. 1.5801901 2.7330463 1.0855
## X.Percentage.of.African.children. 4.1428690 10.3270702 4.4259
## X.IMD.education.average.score. 0.0332300 0.0485405 0.0208
## X.IMD.health.average.score. 0.0863856 0.3934210 0.1564
## X.IMD.IDACI.average.score. 5.3997008 9.6079104 2.6572
## Number of data points: 315
## Effective number of parameters (residual: 2traceS - traceS'S): 92.88358
## Effective degrees of freedom (residual: 2traceS - traceS'S): 222.1164
## Sigma (residual: 2traceS - traceS'S): 0.2308206
## Effective number of parameters (model: traceS): 68.62676
## Effective degrees of freedom (model: traceS): 246.3732
## Sigma (model: traceS): 0.2191634
## Sigma (ML): 0.1938249
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 39.72662
## AIC (GWR p. 96, eq. 4.22): -71.14606
## Residual sum of squares: 11.83395
## Quasi-global R2: 0.8855612
#Attach coefficients to original dataframe
results<-as.data.frame(gwr.model$SDF)
names(results)
## [1] "sum.w"
## [2] "X.Intercept."
## [3] "X.Percentage.of.Mixed.children."
## [4] "X.Percentage.of.Asian.children."
## [5] "X.Percentage.of.African.children."
## [6] "X.IMD.education.average.score."
## [7] "X.IMD.health.average.score."
## [8] "X.IMD.IDACI.average.score."
## [9] "X.Intercept._se"
## [10] "X.Percentage.of.Mixed.children._se"
## [11] "X.Percentage.of.Asian.children._se"
## [12] "X.Percentage.of.African.children._se"
## [13] "X.IMD.education.average.score._se"
## [14] "X.IMD.health.average.score._se"
## [15] "X.IMD.IDACI.average.score._se"
## [16] "gwr.e"
## [17] "pred"
## [18] "pred.se"
## [19] "localR2"
## [20] "X.Intercept._se_EDF"
## [21] "X.Percentage.of.Mixed.children._se_EDF"
## [22] "X.Percentage.of.Asian.children._se_EDF"
## [23] "X.Percentage.of.African.children._se_EDF"
## [24] "X.IMD.education.average.score._se_EDF"
## [25] "X.IMD.health.average.score._se_EDF"
## [26] "X.IMD.IDACI.average.score._se_EDF"
## [27] "pred.se.1"
## [28] "coord.x"
## [29] "coord.y"
ObesityMAP@data$coefr2<-results$localR2
ObesityMAP@data$coefmixed<-results$X.Percentage.of.Mixed.children.
ObesityMAP@data$coefafrican<-results$X.Percentage.of.African.children.
ObesityMAP@data$coefedu<-results$X.IMD.education.average.score.
ObesityMAP@data$coefhealth<-results$X.IMD.health.average.score.
ObesityMAP@data$coefasian<-results$X.Percentage.of.Asian.children.
ObesityMAP@data$coefidaci<-results$X.IMD.IDACI.average.score.
#Local R-squared
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(ObesityMAP) +
tm_polygons(col = "coefr2", title = "Local R-squared", breaks = c(-Inf,0.7,0.75,0.8,0.85,0.9,0.95, Inf), palette = "YlGnBu", alpha = 1)+
tm_format_World(title = NA,legend.width = 0.8, scale = 1,
legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(ObesityMAP) +
tm_polygons(col = "coefafrican", title = "Coefficient for percentage of African children", palette = "RdBu", alpha = 1)+
tm_format_World(title = NA,legend.width = 0.8, scale = 1,
legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefafrican" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#Percentage of mixed children
tm_shape(ObesityMAP) +
tm_polygons(col = "coefmixed", title = "Coefficient for percentage of Mixed children",breaks = c(-Inf,-20,-10,-5,0,5,10,15, Inf), palette = "RdBu", alpha = 1)+
tm_format_World(title = NA,legend.width = 0.8, scale = 1,
legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefmixed" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#Percentage of Asian children
tm_shape(ObesityMAP) +
tm_polygons(col = "coefasian", title = "Coefficient for percentage of Asian children", palette = "RdBu", alpha = 1)+
tm_format_World(title = NA,legend.width = 0.8, scale = 1,
legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefasian" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#IDACI score
tm_shape(ObesityMAP) +
tm_polygons(col = "coefidaci", title = "Coefficient for IMD IDACI average acore", breaks = c(-Inf,-2,0,2,4,6,8, Inf), palette = "RdBu", alpha = 1)+
tm_format_World(title = NA,legend.width = 0.8, scale = 1,
legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefidaci" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#IMD Health score
tm_shape(ObesityMAP) +
tm_polygons(col = "coefhealth", title = "Coefficient for IMD Health average acore", palette = "RdBu", alpha = 1)+
tm_format_World(title = NA,legend.width = 0.8, scale = 1,
legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefhealth" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#IMD education
tm_shape(ObesityMAP) +
tm_polygons(col = "coefedu", title = "Coefficient for IMD Education average acore", palette = "RdBu", alpha = 1)+
tm_format_World(title = NA,legend.width = 0.8, scale = 1,
legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefedu" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#render("Report code.Rmd", output_dir = output_dir, params = list(output_dir = output_dir))
modeldata2<- childdata2[,c("GSS_CODE","root_ob","Percentage of Mixed children","Percentage of Asian children","Percentage of African children",
"IMD education average score","IMD health average score","IMD IDACI average score")]
clusterdata<- childdata2[,c("root_ob","Percentage of Mixed children","Percentage of Asian children","Percentage of African children",
"IMD education average score","IMD health average score","IMD IDACI average score")]
value <- colnames(clusterdata)
# creates a new data frame
stand_data <- clusterdata
for(i in 1: ncol (clusterdata)){
stand_data[, value[i]] <- scale(as.numeric(modeldata2[, value[i]]))
}
#K-means clustering (k=5)
Km <- kmeans(stand_data, 5, nstart = 25, iter.max = 1000)
KmClusters <- as.matrix(Km$cluster)
KmClusters <- as.data.frame(KmClusters)
KmCenters <- as.matrix(Km$centers)
KmCenters <- as.data.frame(KmCenters)
#Number of LAs in each cluster
table(KmClusters)
## KmClusters
## 1 2 3 4 5
## 80 109 80 21 25
#Validate choice of k
# Total within-cluster sum of square
wss <- NULL
for (i in 1:15) wss[i] <- kmeans(stand_data,centers = i,iter.max = 1000)$tot.withinss
plot(1:15, wss, type = "b", pch = 19, xlab = "Number of Clusters",
ylab = "Total within-cluster sum of squares")
#Cluster plot of the first 2 principal components
library(cluster)
clusplot(stand_data,Km$cluster, color = TRUE, shade = FALSE,
labels = 4, lines = 0, plotchar = FALSE)
#More cluster plot
library(ggplot2)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_cluster(Km, data = stand_data, geom = "point", ellipse = F, pointsize = 0.5,
ggtheme = theme_classic())
#Radial plot for each group
library(plotrix)
# creates a object of 5 zeros (5 groups)
KmCenters[5,]<- c(0)
par(cex.axis = 0.8, cex.lab = 0.8)
radial.plot(KmCenters[c(1,5),], labels = colnames(KmCenters),
boxed.radial = FALSE, show.radial.grid = TRUE,
line.col = c("blue", "red"), radlab = TRUE,
rp.type = "p", show.grid.labels = 3)
# Creates a map of cluster groups
#Join the cluster labels to the GSS codes
GSScode<- childdata2[,c(1,2)]
Classification <- as.data.frame(cbind(as.character(GSScode[,1]), KmClusters[,1]))
names(Classification) <- c("GSS_CODE", "Classification")
OA.Class<- merge(ObesityMAP, Classification, by.x = "GSS_CODE", by.y = "GSS_CODE",duplicateGeoms = TRUE)
# creates a map in R
tm_shape(OA.Class) +
tm_polygons(col = "Classification", title = "Cluster groups", palette = "RdBu", alpha = 1)+
tm_format_World(title = NA,legend.width = 0.8, scale = 1,
legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Warning in tm_format_World(title = NA, legend.width = 0.8, scale = 1,
## legend.position = c("left", : tm_format_World is deprecated as of tmap version
## 2.0. Please use tm_format("World", ...) instead
## Compass not supported in view mode.
## Warning: The shape OA.Class is invalid. See sf::st_is_valid
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.